{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第7章"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_breast_cancer\n",
    "bc = load_breast_cancer()\n",
    "y = bc.target\n",
    "X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names)\n",
    "# 转化为df\n",
    "df = X\n",
    "df['target'] = y\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第10章--可用第11章的方法会更好吧"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_iris\n",
    "data = load_iris()\n",
    "# 转化为df\n",
    "X = pd.DataFrame.from_records(data=data.data, columns=data.feature_names)\n",
    "df = X\n",
    "df['target'] = data.target\n",
    "df.shape\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第11章"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(42)\n",
    "x = np.array([i * np.pi / 180 for i in range(-180, 60, 5)])\n",
    "# 加入了分布正态的噪音\n",
    "y = np.cos(x) + np.random.normal(0, 0.15, len(x))\n",
    "data = pd.DataFrame(np.column_stack([x, y]), columns=['x', 'y'])\n",
    "\n",
    "pow_max = 13\n",
    "# 构造不同幂的x\n",
    "for i in range(2, pow_max):  \n",
    "    colname = 'x_%d' % i  \n",
    "    data[colname] = data['x']**i\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# iris 是3分类数据\n",
    "from sklearn.datasets import load_iris\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "X, y = load_iris(return_X_y=True)\n",
    "\n",
    "# 使用liblinear和ovr多分类策略\n",
    "clf = LogisticRegression(solver='liblinear',\n",
    "                         multi_class='auto',\n",
    "                         random_state=42).fit(X, y)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第13章"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.random.seed(42)\n",
    "# 构造500个数据点\n",
    "n = 500\n",
    "X = np.array([i / n for i in range(n + 1)])\n",
    "# 构造一个包含方差为 0.01 的噪音数据\n",
    "y = np.array([i + np.random.normal(scale=0.1) for i in X])\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(X,\n",
    "                                                    y,\n",
    "                                                    test_size=0.3,\n",
    "                                                    random_state=42)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import make_moons\n",
    "n = 200\n",
    "X, y = make_moons(n, noise=0.2, random_state=42)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第14章"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_breast_cancer\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X, y = load_breast_cancer(return_X_y=True)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X,\n",
    "                                                    y,\n",
    "                                                    test_size=0.3,\n",
    "                                                    random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(569,)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.09617567, -0.0868145 ,  0.14069064, -0.53918396, -0.9385029 ,\n",
       "        0.60045423,  0.17581104,  0.9084849 , -1.40242994,  0.94769138,\n",
       "       -1.55339643,  1.24974237, -0.21363054,  0.59670874, -1.00614206,\n",
       "       -1.24015771, -0.77410559, -0.92496101,  0.11054746,  1.19347243,\n",
       "       -0.72893452, -1.03266264,  0.54481386, -1.47555233, -1.66766696,\n",
       "        0.57061154, -0.00934854,  0.26122379,  0.71933342,  1.86510526,\n",
       "        0.27871475, -1.59072971, -0.0287742 ,  0.37780734, -0.97304305,\n",
       "        0.53184703,  0.83703113, -0.80406863, -0.34316963,  0.56170776,\n",
       "       -2.33972493, -0.44952508, -0.60698045, -0.22866246,  0.11901435,\n",
       "       -0.97940484, -0.37973383, -0.67555853,  0.03774569, -0.53144638,\n",
       "        0.75276555,  0.53794207, -0.65888767,  0.8545792 ,  0.33677262,\n",
       "        0.26053778,  0.12933568, -1.41321018, -0.53840309, -0.06292151,\n",
       "       -0.12327421,  0.23046947,  1.27429653,  1.03106303, -0.64105064,\n",
       "       -0.27217573,  0.19677697,  0.02642418,  1.11538722, -1.33829012,\n",
       "        0.49920385, -0.53848357,  2.05591264, -1.01302818,  1.36153763,\n",
       "       -1.92416538,  0.72329563, -0.61398797, -0.53549807, -1.37977353,\n",
       "        2.00093701,  0.44378903,  0.65321192,  0.44226905,  0.71434957,\n",
       "       -1.93925185, -0.20129571, -0.30608957,  0.93213593, -1.0682046 ,\n",
       "        1.48011843, -0.9913619 , -1.52477073,  0.05822968,  1.50972651,\n",
       "        0.01827199,  0.60979505,  0.52768247, -0.44532122,  0.18523935])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 正态分布 \n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "np.random.normal(loc=0,scale=1.0,size=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
